from database.db_handler import MysqlHander
from common.my_http import MyHttp
import urllib.request
from bs4 import BeautifulSoup
from common.market_data import MarketData
import glob
import jieba
import re
import threading
import hashlib
import json
from common.entity_data import EntityData
from database.db_business import DbBusiness

'''
来源：https://www.xuanxiaodi.com/ranks/1921-1.html?hmsr=bd_rankcard
'''

class DaxuepaimingQs:
    def __init__(self):
        self.business = DbBusiness()
        self.entity = EntityData()
        self.entity_sets = ""
    
    def get_url(self, i):
        return "https://www.xuanxiaodi.com/api/school/ranking/list?id=1921&regionId=&page=" + str(i) + "&pageSize=20"
    
    def get_online_data(self, url):
        proxy = self.business.query_proxy()
        json_data = json.loads(MyHttp.http_json_data_v2(url, proxy))
        datas = json_data["data"]["data"]
        for d in datas:
            self.entity_sets += "|" + d["chineseName"]
    
    def get_data(self):
        for page in range (1, 66):
            url = self.get_url(page)
            i = 0
            while i < 5:
                try:
                    self.get_online_data(url)
                    break
                except Exception as e:
                    i = i + 1
                    if i >= 5:
                        raise e

        self.entity.add_entity_data("QS世界大学排名", "来源：2022QS世界大学排名", 500, 5000001, "QS", "", self.entity_sets)
        self.entity.update_entity_data("QS世界大学排名", "来源：2022QS世界大学排名", 500, 5000001, "QS", "", self.entity_sets)


def daxuepaiming_qs_api():
    d = DaxuepaimingQs()
    d.get_data()

if __name__ == '__main__':
    daxuepaiming_qs_api()
